Click to show code
print('hello')
#> [1] "hello"The following machine learning project focuses on…
print('hello')
#> [1] "hello"# Python code
import numpy as np
print(np.mean([10, 20, 30, 40, 50]))
#> 30.0# Load the CSV file into a data frame
properties <- read.csv(file.path(here(),"data/properties.csv"))
# Identify values causing the issue
problematic_values <- properties$number_of_rooms[is.na(as.numeric(properties$number_of_rooms))]
# Replace non-numeric values with NA
properties$number_of_rooms <- as.numeric(gsub("[^0-9.]", "", properties$number_of_rooms))
# Remove non-numeric characters and convert to numeric
properties$price <- as.numeric(gsub("[^0-9]", "", properties$price))
# Subset the dataset to exclude rows with price < 20000
properties <- properties[properties$price >= 20000, ]
# Subset the dataset to exclude rows with numbers of rooms < 25
properties <- properties[properties$number_of_rooms <25, ]
# Replace incomplete addresses
properties$address <- gsub("^\\W*[.,0-]\\W*", "", properties$address)
properties_filtered <- na.omit(properties)
properties_filtered$year_category <- substr(properties_filtered$year_category, 1, 9)
# Assuming 'year_category' is a column in the 'properties' dataset
properties_filtered$year_category <- as.factor(properties_filtered$year_category)
# Preprocess the number_of_rooms column
properties_filtered$number_of_rooms <- as.character(properties_filtered$number_of_rooms)
properties_filtered$number_of_rooms <- gsub("\\D", "", properties_filtered$number_of_rooms) # Remove non-numeric characters
properties_filtered$number_of_rooms <- as.numeric(properties_filtered$number_of_rooms) # Convert to numeric
properties_filtered$number_of_rooms <- trunc(properties_filtered$number_of_rooms) # Truncate non-integer values
# add majuscule to canton
properties_filtered$canton <- tools::toTitleCase(properties_filtered$canton)
head(properties_filtered)
#> price number_of_rooms address canton property_type
#> 1 1800000 65 1844 Villeneuve VD Vaud Apartment
#> 2 1980000 55 1820 Montreux Vaud Apartment
#> 3 488000 35 1882 Gryon Vaud Apartment
#> 4 1755000 7 1820 Montreux Vaud Apartment
#> 5 650000 25 1815 Clarens Vaud Apartment
#> 6 1490000 45 1260 Nyon Vaud Apartment
#> floor year_category
#> 1 eg 0-1919
#> 2 eg 0-1919
#> 3 eg 0-1919
#> 4 eg 0-1919
#> 5 eg 0-1919
#> 6 eg 0-1919head(properties_filtered)
#> price number_of_rooms address canton property_type
#> 1 1800000 65 1844 Villeneuve VD Vaud Apartment
#> 2 1980000 55 1820 Montreux Vaud Apartment
#> 3 488000 35 1882 Gryon Vaud Apartment
#> 4 1755000 7 1820 Montreux Vaud Apartment
#> 5 650000 25 1815 Clarens Vaud Apartment
#> 6 1490000 45 1260 Nyon Vaud Apartment
#> floor year_category
#> 1 eg 0-1919
#> 2 eg 0-1919
#> 3 eg 0-1919
#> 4 eg 0-1919
#> 5 eg 0-1919
#> 6 eg 0-1919
df <- properties_filtered
#the address column is like : '1844 Villeneuve VD' and has zip code number in it
#taking out the zip code number and creating a new column 'zip_code'
#the way to identify the zip code is to identify numbers that are 4 digits long
df$zip_code <- as.numeric(gsub("\\D", "", df$address))
#removing the first two number of zip code has more than 4 number
df$zip_code <- ifelse(df$zip_code > 9999, df$zip_code %% 10000, df$zip_code)
head(df)
#> price number_of_rooms address canton property_type
#> 1 1800000 65 1844 Villeneuve VD Vaud Apartment
#> 2 1980000 55 1820 Montreux Vaud Apartment
#> 3 488000 35 1882 Gryon Vaud Apartment
#> 4 1755000 7 1820 Montreux Vaud Apartment
#> 5 650000 25 1815 Clarens Vaud Apartment
#> 6 1490000 45 1260 Nyon Vaud Apartment
#> floor year_category zip_code
#> 1 eg 0-1919 1844
#> 2 eg 0-1919 1820
#> 3 eg 0-1919 1882
#> 4 eg 0-1919 1820
#> 5 eg 0-1919 1815
#> 6 eg 0-1919 1260#read .csv AMTOVZ_CSV_LV95
amto <- read.csv(file.path(here(),"data/AMTOVZ_CSV_LV95.csv"), sep = ";")
head(amto)
#> Ortschaftsname PLZ Zusatzziffer Gemeindename BFS.Nr
#> 1 Aeugst am Albis 8914 0 Aeugst am Albis 1
#> 2 Aeugstertal 8914 2 Aeugst am Albis 1
#> 3 Zwillikon 8909 0 Affoltern am Albis 2
#> 4 Affoltern am Albis 8910 0 Affoltern am Albis 2
#> 5 Bonstetten 8906 0 Bonstetten 3
#> 6 Sihlbrugg 6340 4 Hausen am Albis 4
#> Kantonskürzel E N Sprache Validity
#> 1 ZH 2679403 1235842 de 2008-07-01
#> 2 ZH 2679815 1237404 de 2008-07-01
#> 3 ZH 2675280 1238108 de 2008-07-01
#> 4 ZH 2676852 1236930 de 2008-07-01
#> 5 ZH 2677412 1241078 de 2008-07-01
#> 6 ZH 2686082 1230649 de 2008-07-01
#creating a new dataframe with 'Ortschaftsname' as 'City'Place_name', 'PLZ' as 'zip_code' and 'KantonskÃ.rzel' as 'Canton_code'
amto_df <- data.frame(City=amto$Ortschaftsname, zip_code=amto$PLZ, Canton_code=amto$Kantonskürzel)
head(amto_df)
#> City zip_code Canton_code
#> 1 Aeugst am Albis 8914 ZH
#> 2 Aeugstertal 8914 ZH
#> 3 Zwillikon 8909 ZH
#> 4 Affoltern am Albis 8910 ZH
#> 5 Bonstetten 8906 ZH
#> 6 Sihlbrugg 6340 ZH#merge the two dataframes 'df' and 'amto_df' on 'zip_code'
df <- merge(df, amto_df, by='zip_code', all.x=TRUE)
#check if there are nan in city
df[is.na(df$City),]
#> zip_code price number_of_rooms
#> 1 25 2200000 10
#> 2 25 2200000 65
#> 3 26 1995000 75
#> 4 26 870490 45
#> 5 322 870000 25
#> 6 322 1295770 45
#> 2253 1200 2450000 6
#> 2254 1200 982130 45
#> 11886 1919 2535730 55
#> 11887 1919 230000 15
#> 11888 1919 1415380 35
#> 11889 1919 1043260 45
#> 11890 1919 2535730 55
#> 17993 2500 1050000 45
#> 17994 2500 1100000 5
#> 17995 2500 887500 55
#> 17996 2500 870500 45
#> 17997 2500 1176820 45
#> 17998 2500 1159550 35
#> 17999 2500 1927050 45
#> 18000 2500 892500 45
#> 18001 2500 887500 45
#> 18002 2500 420000 45
#> 18003 2500 877500 45
#> 18004 2500 885500 55
#> 18005 2500 872500 45
#> 19603 3000 1448610 45
#> 19604 3000 1515060 45
#> 19605 3000 956880 45
#> 19606 3000 1222680 35
#> 19607 3000 1448610 45
#> 19608 3000 1448610 45
#> 19609 3000 1515060 45
#> 19610 3000 820000 55
#> 19611 3000 1222680 35
#> 19612 3000 1590000 55
#> 19613 3000 1448610 45
#> 27169 4000 2100000 65
#> 27170 4000 975000 45
#> 30708 5201 963520 45
#> 33490 6511 584760 3
#> 33927 6547 19935000 55
#> 35207 6602 270000 15
#> 35208 6602 3721200 55
#> 35209 6602 3721200 55
#> 35210 6604 2644710 35
#> 35211 6604 2644710 35
#> 35212 6604 1142940 45
#> 35213 6604 610000 25
#> 35214 6604 810690 35
#> 35215 6604 860000 35
#> 35216 6604 917010 45
#> 35217 6604 1010040 45
#> 40817 6901 3628170 45
#> 40818 6911 877140 55
#> 40819 6911 810690 45
#> 40820 6911 730950 45
#> 40821 6911 465150 35
#> 42848 7133 2246010 35
#> 42861 7135 3575010 65
#> 43231 8000 1295770 35
#> 43232 8000 2100000 45
#> 43233 8000 2495000 55
#> 44144 8238 739000 35
#> 44145 8238 739000 35
#> 44146 8238 716000 35
#> 44147 8238 716000 35
#> 44148 8238 325600 3
#> 44889 8423 2910510 45
#> 44890 8423 2804190 55
#> 47001 9002 3787650 45
#> 47621 9241 724300 35
#> address
#> 1 1000 Lausanne 25
#> 2 1000 Lausanne 25
#> 3 Lausanne 26, 1000 Lausanne 26
#> 4 1000 Lausanne 26
#> 5 Via Cuolm Liung 30d, 7032 Laax GR 2
#> 6 Via Murschetg 29, 7032 Laax GR 2
#> 2253 1200 Genève
#> 2254 Chemin des pralets, 74100 Etrembières, 1200 Genève
#> 11886 1919 Martigny
#> 11887 1919 Martigny
#> 11888 1919 Martigny
#> 11889 1919 Martigny
#> 11890 1919 Martigny
#> 17993 Hohlenweg 11b, 2500 Biel/Bienne
#> 17994 2500 Biel/Bienne
#> 17995 2500 Biel/Bienne
#> 17996 2500 Biel/Bienne
#> 17997 2500 Biel/Bienne
#> 17998 2500 Biel/Bienne
#> 17999 2500 Bienne
#> 18000 2500 Biel/Bienne
#> 18001 2500 Biel/Bienne
#> 18002 2500 Biel/Bienne
#> 18003 2500 Biel/Bienne
#> 18004 2500 Biel/Bienne
#> 18005 2500 Biel/Bienne
#> 19603 3000 Bern
#> 19604 3000 Bern
#> 19605 3000 Bern
#> 19606 3000 Bern
#> 19607 3000 Bern
#> 19608 3000 Bern
#> 19609 3000 Bern
#> 19610 3000 Bern
#> 19611 3000 Bern
#> 19612 3000 Bern
#> 19613 3000 Bern
#> 27169 4000 Basel
#> 27170 4000 Basel
#> 30708 5201 Brugg AG
#> 33490 6511 Cadenazzo
#> 33927 Augio 1F, 6547 Augio
#> 35207 6602 Muralto
#> 35208 6602 Muralto
#> 35209 6602 Muralto
#> 35210 6604 Solduno
#> 35211 6604 Solduno
#> 35212 6604 Solduno
#> 35213 6604 Solduno
#> 35214 6604 Solduno
#> 35215 6604 Solduno
#> 35216 6604 Locarno
#> 35217 6604 Locarno
#> 40817 6901 Lugano
#> 40818 6911 Campione d'Italia
#> 40819 6911 Campione d'Italia
#> 40820 6911 Campione d'Italia
#> 40821 6911 Campione d'Italia
#> 42848 Inder Platenga 34, 7133 Obersaxen
#> 42861 7135 Fideris
#> 43231 8000 Zürich
#> 43232 8000 Zürich
#> 43233 8000 Zürich
#> 44144 8238 Büsingen am Hochrhein
#> 44145 8238 Büsingen am Hochrhein
#> 44146 Junkerstrasse 85, 8238 Büsingen am Hochrhein
#> 44147 Junkerstrasse 85, 8238 Büsingen am Hochrhein
#> 44148 Stemmerstrasse 14, 8238 Büsingen am Hochrhein
#> 44889 Chüngstrasse 48, 8423 Embrach
#> 44890 Chüngstrasse 60, 8423 Embrach
#> 47001 6900 Lugano 2 Paradiso Caselle
#> 47621 9241 Kradolf
#> canton property_type floor year_category City
#> 1 Vaud Single house 1919-1945 <NA>
#> 2 Vaud Villa 2006-2010 <NA>
#> 3 Vaud Villa 1961-1970 <NA>
#> 4 Vaud Apartment noteg 2016-2024 <NA>
#> 5 Grisons Apartment eg 2016-2024 <NA>
#> 6 Grisons Apartment noteg 2011-2015 <NA>
#> 2253 Geneva Bifamiliar house 1981-1990 <NA>
#> 2254 Geneva Bifamiliar house 2016-2024 <NA>
#> 11886 Valais Attic flat noteg 2016-2024 <NA>
#> 11887 Valais Apartment eg 2016-2024 <NA>
#> 11888 Valais Apartment noteg 2016-2024 <NA>
#> 11889 Valais Apartment noteg 2016-2024 <NA>
#> 11890 Valais Apartment noteg 2016-2024 <NA>
#> 17993 Bern Single house 2001-2005 <NA>
#> 17994 Bern Single house 2001-2005 <NA>
#> 17995 Bern Single house 2016-2024 <NA>
#> 17996 Bern Single house 2016-2024 <NA>
#> 17997 Bern Villa 2016-2024 <NA>
#> 17998 Bern Villa 2016-2024 <NA>
#> 17999 Bern Single house 2016-2024 <NA>
#> 18000 Bern Single house 2016-2024 <NA>
#> 18001 Bern Single house 2016-2024 <NA>
#> 18002 Bern Apartment noteg 1971-1980 <NA>
#> 18003 Bern Single house 2016-2024 <NA>
#> 18004 Bern Single house 2016-2024 <NA>
#> 18005 Bern Single house 2016-2024 <NA>
#> 19603 Bern Apartment eg 2016-2024 <NA>
#> 19604 Bern Apartment eg 2016-2024 <NA>
#> 19605 Bern Apartment eg 2016-2024 <NA>
#> 19606 Bern Apartment noteg 2016-2024 <NA>
#> 19607 Bern Apartment noteg 2016-2024 <NA>
#> 19608 Bern Apartment eg 2016-2024 <NA>
#> 19609 Bern Apartment eg 2016-2024 <NA>
#> 19610 Bern Apartment noteg 2016-2024 <NA>
#> 19611 Bern Duplex noteg 2016-2024 <NA>
#> 19612 Bern Apartment noteg 1991-2000 <NA>
#> 19613 Bern Roof flat noteg 2016-2024 <NA>
#> 27169 Basel-Stadt Villa 2016-2024 <NA>
#> 27170 Basel-Stadt Single house 2016-2024 <NA>
#> 30708 Aargau Apartment noteg 2016-2024 <NA>
#> 33490 Ticino Apartment noteg 2016-2024 <NA>
#> 33927 Grisons Single house 2016-2024 <NA>
#> 35207 Ticino Apartment eg 1961-1970 <NA>
#> 35208 Ticino Single house 1981-1990 <NA>
#> 35209 Ticino Single house 1981-1990 <NA>
#> 35210 Ticino Attic flat noteg 2011-2015 <NA>
#> 35211 Ticino Apartment noteg 2011-2015 <NA>
#> 35212 Ticino Apartment noteg 2016-2024 <NA>
#> 35213 Ticino Apartment noteg 2016-2024 <NA>
#> 35214 Ticino Apartment noteg 2016-2024 <NA>
#> 35215 Ticino Apartment noteg 2016-2024 <NA>
#> 35216 Ticino Apartment noteg 2011-2015 <NA>
#> 35217 Ticino Apartment noteg 2011-2015 <NA>
#> 40817 Ticino Attic flat noteg 2011-2015 <NA>
#> 40818 Ticino Single house 1971-1980 <NA>
#> 40819 Ticino Apartment eg 1946-1960 <NA>
#> 40820 Ticino Apartment noteg 1991-2000 <NA>
#> 40821 Ticino Apartment noteg 1946-1960 <NA>
#> 42848 Grisons Single house 2006-2010 <NA>
#> 42861 Grisons Single house 0-1919 <NA>
#> 43231 Zurich Single house 2016-2024 <NA>
#> 43232 Zurich Apartment noteg 2016-2024 <NA>
#> 43233 Zurich Apartment noteg 0-1919 <NA>
#> 44144 Schaffhausen Apartment eg 2016-2024 <NA>
#> 44145 Schaffhausen Attic flat eg 2016-2024 <NA>
#> 44146 Schaffhausen Attic flat noteg 2016-2024 <NA>
#> 44147 Schaffhausen Apartment noteg 2016-2024 <NA>
#> 44148 Schaffhausen Apartment noteg 1961-1970 <NA>
#> 44889 Zurich Single house 2016-2024 <NA>
#> 44890 Zurich Bifamiliar house 2016-2024 <NA>
#> 47001 Ticino Apartment noteg 2011-2015 <NA>
#> 47621 Thurgau Apartment noteg 1991-2000 <NA>
#> Canton_code
#> 1 <NA>
#> 2 <NA>
#> 3 <NA>
#> 4 <NA>
#> 5 <NA>
#> 6 <NA>
#> 2253 <NA>
#> 2254 <NA>
#> 11886 <NA>
#> 11887 <NA>
#> 11888 <NA>
#> 11889 <NA>
#> 11890 <NA>
#> 17993 <NA>
#> 17994 <NA>
#> 17995 <NA>
#> 17996 <NA>
#> 17997 <NA>
#> 17998 <NA>
#> 17999 <NA>
#> 18000 <NA>
#> 18001 <NA>
#> 18002 <NA>
#> 18003 <NA>
#> 18004 <NA>
#> 18005 <NA>
#> 19603 <NA>
#> 19604 <NA>
#> 19605 <NA>
#> 19606 <NA>
#> 19607 <NA>
#> 19608 <NA>
#> 19609 <NA>
#> 19610 <NA>
#> 19611 <NA>
#> 19612 <NA>
#> 19613 <NA>
#> 27169 <NA>
#> 27170 <NA>
#> 30708 <NA>
#> 33490 <NA>
#> 33927 <NA>
#> 35207 <NA>
#> 35208 <NA>
#> 35209 <NA>
#> 35210 <NA>
#> 35211 <NA>
#> 35212 <NA>
#> 35213 <NA>
#> 35214 <NA>
#> 35215 <NA>
#> 35216 <NA>
#> 35217 <NA>
#> 40817 <NA>
#> 40818 <NA>
#> 40819 <NA>
#> 40820 <NA>
#> 40821 <NA>
#> 42848 <NA>
#> 42861 <NA>
#> 43231 <NA>
#> 43232 <NA>
#> 43233 <NA>
#> 44144 <NA>
#> 44145 <NA>
#> 44146 <NA>
#> 44147 <NA>
#> 44148 <NA>
#> 44889 <NA>
#> 44890 <NA>
#> 47001 <NA>
#> 47621 <NA>We have 144 NAN, where
Removed them ::: {.cell layout-align=“center”}
#remove the rows with nan in city
properties_filtered <- df[!is.na(df$City),]:::
#excel file modified for fribourg and liestal
# read csv
impots <- read.csv(file.path(here(),"data/statistik-steuerfuesse-np-1995-2023-fr.csv"), sep = ",", header = TRUE, stringsAsFactors = FALSE)
# Remove last 13 rows
impots <- head(impots, -14)
# Rename columns
colnames(impots) <- c("Chefs-lieux cantonaux", "Impôt cantonal", "Impôt communal", "Impôt paroissial Evang. / réform", "Impôt paroissial Kath. / cath.rom")
# Set row names to the content of the first column
rownames(impots) <- impots$`Chefs-lieux cantonaux`
# Remove the first column (which is now row names)
impots1 <- impots[, -1, drop = FALSE]
# Set the row names for impots1 the same as impots
rownames(impots1) <- rownames(impots)
impots <- impots1
rm(impots1)
# Write data to CSV based on here()
write.csv(impots, file.path(here(),"data/impots.csv"), row.names = TRUE)# Create a copy of properties_filtered
properties_merged <- properties_filtered
# Loop through unique canton names in properties_filtered
for (canton_name in unique(properties_filtered$canton)) {
# Check if canton_name exists as a row name in impots
if (canton_name %in% rownames(impots)) {
# Get the corresponding row from impots
impots_row <- impots[canton_name, ]
# Add columns to properties_merged with impots content
for (col_name in names(impots)) {
properties_merged[properties_merged$canton == canton_name, paste0(col_name, "_impots")] <- impots_row[[col_name]]
}
# Remove impots_row from memory
rm(impots_row)
}
}
# Write data to CSV based on here()
write.csv(properties_merged, file.path(here(),"data/properties_merged.csv"), row.names = TRUE)##Dataset used for the rest of the analysis ::: {.cell layout-align=“center”}
properties_filtered <- properties_merged:::
Replaces NAs in both Taux de couverture social and Political (Conseil National Datas) For Taux de couverture Social: NAs were due to reason “Q” = “Not indicated to protect confidentiality” We replaced the NAs by the average taux de couverture in Switzerland in 2019, which was 3.2%
For Political data: NAs were due to reason “M” = “Not indicated because data was not important or applicable” Therefore, we replaced the NAs by 0
# il faudra changer le path
commune_prep <- read.csv(file.path(here(),"data/commune_data.csv"), sep = ";", header = TRUE, stringsAsFactors = FALSE)
# We keep only 2019 to have some reference? (2020 is apparently not really complete)
commune_2019 <- subset(commune_prep, PERIOD_REF == "2019") %>%
select(c("REGION", "CODE_REGION", "INDICATORS", "VALUE", "STATUS"))
# delete les lignes ou Status = Q ou M (pas de valeur) et ensuite on enlève la colonne
commune_2019 <- subset(commune_2019, STATUS == "A") %>%
select(c("REGION", "CODE_REGION", "INDICATORS", "VALUE"))
# on enlève les lignes qui sont des aggrégats
commune_2019 <- subset(commune_2019, REGION != "Schweiz")
commune_2019 <- commune_2019 %>%
pivot_wider(names_from = INDICATORS, values_from = VALUE)
# Rename columns using the provided map
df_commune <- commune_2019 %>%
rename(`Population - Habitants` = Ind_01_01,
`Population - Densité de la population` = Ind_01_03,
`Population - Etrangers` = Ind_01_08,
`Population - Part du groupe d'âge 0-19 ans` = Ind_01_04,
`Population - Part du groupe d'âge 20-64 ans` = Ind_01_05,
`Population - Part du groupe d'âge 65+ ans` = Ind_01_06,
`Population - Taux brut de nuptialité` = Ind_01_09,
`Population - Taux brut de divortialité` = Ind_01_10,
`Population - Taux brut de natalité` = Ind_01_11,
`Population - Taux brut de mortalité` = Ind_01_12,
`Population - Ménages privés` = Ind_01_13,
`Population - Taille moyenne des ménages` = Ind_01_14,
`Sécurité sociale - Taux d'aide sociale` = Ind_11_01,
`Conseil national - PLR` = Ind_14_01,
`Conseil national - PDC` = Ind_14_02,
`Conseil national - PS` = Ind_14_03,
`Conseil national - UDC` = Ind_14_04,
`Conseil national - PEV/PCS` = Ind_14_05,
`Conseil national - PVL` = Ind_14_06,
`Conseil national - PBD` = Ind_14_07,
`Conseil national - PST/Sol.` = Ind_14_08,
`Conseil national - PES` = Ind_14_09,
`Conseil national - Petits partis de droite` = Ind_14_10)
# If no one voted for a party, set as NA -> replacing it with 0 instead
df_commune <- df_commune %>%
mutate_at(vars(starts_with("Conseil national")), ~replace_na(., 0))
# Removing NAs from Taux de couverture sociale column
# Setting the mean as the mean for Switzerland in 2019 (3.2%)
mean_taux_aide_social <- 3.2
# Replace NA values with the mean
df_commune <- df_commune %>%
mutate(`Sécurité sociale - Taux d'aide sociale` = if_else(is.na(`Sécurité sociale - Taux d'aide sociale`), mean_taux_aide_social, `Sécurité sociale - Taux d'aide sociale`))# Load required libraries
library(ggplot2)
library(plotly)
library(here)
library(readxl)
library(dplyr)
library(tidyr)histogram_price <- ggplot(properties_filtered, aes(x = price)) +
geom_histogram(binwidth = 100000, fill = "skyblue", color = "red") +
labs(title = "Distribution of Prices",
x = "Price",
y = "Frequency") +
theme_minimal()
# Convert ggplot object to plotly object
interactive_histogram_price <- ggplotly(histogram_price)
# Display the interactive histogram
interactive_histogram_pricenote : only price between 0 and 500000 so some outliers aren’t here
# Create the ggplot object
histogram <- ggplot(properties_filtered, aes(x = price)) +
geom_histogram(binwidth = 100000, fill = "skyblue", color = "black") +
facet_wrap(~ property_type, scales = "free", ncol = 2) +
labs(title = "Distribution of Prices by Property Type",
x = "Price",
y = "Frequency") +
theme_minimal() +
xlim(0, 5000000)
# Convert ggplot object to plotly object
interactive_histogram <- ggplotly(histogram)
# Display the interactive plot
interactive_histogramnote : only price between 0 and 500000 so some outliers aren’t here
# Create a histogram of prices for each year category
histogram <- ggplot(properties_filtered, aes(x = price)) +
geom_histogram(binwidth = 100000, fill = "skyblue", color = "black") +
facet_wrap(~ year_category, scales = "free", ncol = 2) +
labs(title = "Distribution of Prices by Year Category",
x = "Price",
y = "Frequency") +
theme_minimal() +
xlim(0, 5000000)
# Convert ggplot object to plotly object
interactive_histogram_year <- ggplotly(histogram)
# Display the interactive plot
interactive_histogram_yearnote : only price between 0 and 500000 so some outliers aren’t here
histogram <- ggplot(properties_filtered, aes(x = price)) +
geom_histogram(binwidth = 100000, fill = "skyblue", color = "black") +
facet_wrap(~ canton, scales = "free", ncol = 2) +
labs(title = "Distribution of Prices by Canton",
x = "Price",
y = "Frequency") +
theme_minimal() +
xlim(0, 5000000)
# Convert ggplot object to plotly object with adjusted height
interactive_histogram <- ggplotly(histogram) %>%
layout(height = 1000) # Adjust the height as needed
# Display the interactive plot
interactive_histogramnote : only price between 0 and 500000 so some outliers aren’t here
and the graph below only show apartments with less than 10 rooms (but you can change the code if needed
properties_room <- properties_filtered[properties_filtered$number_of_rooms < 20, ] # Filter only number_of_rooms less than 20
# Create a histogram of prices for each number of rooms
histogram <- ggplot(properties_room, aes(x = price)) +
geom_histogram(binwidth = 100000, fill = "skyblue", color = "black") +
facet_wrap(~ number_of_rooms, scales = "free", ncol = 2) +
labs(title = "Distribution of Prices by Number of Rooms",
x = "Price",
y = "Frequency") +
theme_minimal() +
xlim(0, 5000000)
# Convert ggplot object to plotly object with adjusted height
interactive_histogram <- ggplotly(histogram) %>%
layout(height = 1000) # Adjust the height as needed
# Display the interactive plot
interactive_histogramcolnames(properties_filtered)[(ncol(properties_filtered) - 3):ncol(properties_filtered)] <- gsub("\\s+", "_", colnames(properties_filtered)[(ncol(properties_filtered) - 3):ncol(properties_filtered)])
# Create a scatter plot to visualize correlation between price and Impôt cantonal
scatter_plot <- ggplot(properties_filtered, aes(x = price, y = Impôt_cantonal_impots)) +
geom_point() +
labs(title = "Correlation between Price and Impôt cantonal",
x = "Price",
y = "Impôt cantonal") +
theme_minimal()
# Convert ggplot object to plotly object
interactive_plot <- ggplotly(scatter_plot)
# Display the interactive plot
interactive_plot# Perform multiple linear regression
model <- lm(price ~ number_of_rooms + canton + property_type + year_category, data = properties)
# Summarize the regression model
summary(model)
#>
#> Call:
#> lm(formula = price ~ number_of_rooms + canton + property_type +
#> year_category, data = properties)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -7013788 -514438 -138948 264464 21628996
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -677158 55739 -12.15 < 2e-16
#> number_of_rooms 337946 6166 54.81 < 2e-16
#> cantonappenzell-ausser-rhoden -464945 126861 -3.66 0.00025
#> cantonappenzell-inner-rhoden -874289 392590 -2.23 0.02596
#> cantonbasel-landschaft -195701 57943 -3.38 0.00073
#> cantonbasel-stadt 218682 105130 2.08 0.03753
#> cantonbern -478376 46221 -10.35 < 2e-16
#> cantonfribourg -781416 48366 -16.16 < 2e-16
#> cantongeneva 2025260 62234 32.54 < 2e-16
#> cantonglarus -573694 173301 -3.31 0.00093
#> cantongrisons 59982 71666 0.84 0.40262
#> cantonjura -801519 77323 -10.37 < 2e-16
#> cantonlucerne -187979 73261 -2.57 0.01030
#> cantonneuchatel -353635 65590 -5.39 7.1e-08
#> cantonnidwalden 991055 244826 4.05 5.2e-05
#> cantonobwalden 366062 244712 1.50 0.13470
#> cantonschaffhausen -584997 120601 -4.85 1.2e-06
#> cantonschwyz 18070 132558 0.14 0.89157
#> cantonsolothurn -784557 61024 -12.86 < 2e-16
#> cantonst-gallen -404890 55918 -7.24 4.6e-13
#> cantonthurgau -37337 63444 -0.59 0.55620
#> cantonticino 125913 38499 3.27 0.00108
#> cantonuri 9578 155772 0.06 0.95097
#> cantonvalais -219964 39781 -5.53 3.3e-08
#> cantonvaud 89914 40258 2.23 0.02553
#> cantonzug 801241 153896 5.21 1.9e-07
#> cantonzurich 316099 49688 6.36 2.0e-10
#> property_typeAttic flat 311019 45964 6.77 1.4e-11
#> property_typeBifamiliar house 41841 42939 0.97 0.32986
#> property_typeChalet 1136804 56690 20.05 < 2e-16
#> property_typeDuplex -5091 56699 -0.09 0.92846
#> property_typeFarm house 237939 118848 2.00 0.04529
#> property_typeLoft 285442 291977 0.98 0.32827
#> property_typeRoof flat 4801 64587 0.07 0.94074
#> property_typeRustic house -281265 249068 -1.13 0.25880
#> property_typeSingle house 389066 24252 16.04 < 2e-16
#> property_typeTerrace flat 88662 87071 1.02 0.30856
#> property_typeVilla 1278283 38187 33.47 < 2e-16
#> year_category1919-1945 10462 61602 0.17 0.86515
#> year_category1946-1960 76025 57261 1.33 0.18429
#> year_category1961-1970 232055 48444 4.79 1.7e-06
#> year_category1971-1980 210609 43422 4.85 1.2e-06
#> year_category1981-1990 237789 43679 5.44 5.3e-08
#> year_category1991-2000 477554 45385 10.52 < 2e-16
#> year_category2001-2005 519338 55369 9.38 < 2e-16
#> year_category2006-2010 591351 48030 12.31 < 2e-16
#> year_category2011-2015 724194 47219 15.34 < 2e-16
#> year_category2016-2024 641233 36926 17.37 < 2e-16
#>
#> (Intercept) ***
#> number_of_rooms ***
#> cantonappenzell-ausser-rhoden ***
#> cantonappenzell-inner-rhoden *
#> cantonbasel-landschaft ***
#> cantonbasel-stadt *
#> cantonbern ***
#> cantonfribourg ***
#> cantongeneva ***
#> cantonglarus ***
#> cantongrisons
#> cantonjura ***
#> cantonlucerne *
#> cantonneuchatel ***
#> cantonnidwalden ***
#> cantonobwalden
#> cantonschaffhausen ***
#> cantonschwyz
#> cantonsolothurn ***
#> cantonst-gallen ***
#> cantonthurgau
#> cantonticino **
#> cantonuri
#> cantonvalais ***
#> cantonvaud *
#> cantonzug ***
#> cantonzurich ***
#> property_typeAttic flat ***
#> property_typeBifamiliar house
#> property_typeChalet ***
#> property_typeDuplex
#> property_typeFarm house *
#> property_typeLoft
#> property_typeRoof flat
#> property_typeRustic house
#> property_typeSingle house ***
#> property_typeTerrace flat
#> property_typeVilla ***
#> year_category1919-1945
#> year_category1946-1960
#> year_category1961-1970 ***
#> year_category1971-1980 ***
#> year_category1981-1990 ***
#> year_category1991-2000 ***
#> year_category2001-2005 ***
#> year_category2006-2010 ***
#> year_category2011-2015 ***
#> year_category2016-2024 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 1240000 on 21363 degrees of freedom
#> (72 observations deleted due to missingness)
#> Multiple R-squared: 0.323, Adjusted R-squared: 0.321
#> F-statistic: 216 on 47 and 21363 DF, p-value: <2e-16